# delimit ;
clear ;
set more off ;
version 12.0 ;

cd "replication" ;

* ***************************************************************************** ;
* Carpena, Fenella, and Bilal Zia. "The causal mechanism of financial education: 
* Evidence from mediation analysis." Journal of Economic Behavior & Organization 
* 177 (2020): 143-184.
* 
* This do-file creates Appendix Table A9 of the paper
* ***************************************************************************** ;

program drop _all ;
est drop _all ;
adopath + "`c(pwd)'/code/" ;

* ***************************************************************************** ;
* get endline data
* ***************************************************************************** ;

use "./data/endline.dta", clear ;

* merge with baseline ;
merge 1:1 id using "./data/baseline.dta" ;
tab _merge ;

* correction to the variable label ;
label var savings_bank "has bank savings account" ;

* ***************************************************************************** ;
* sample selection
* ***************************************************************************** ;

* drop households with baseline but no endline ;
drop if _merge == 2 ; 
drop _merge ;

* drop households with no financial knowledge scores ; 
egen temp_knowl = rowmean(numer_fin_return-attit_budget) ;
drop if temp_knowl == . ;
drop temp_knowl ; 

* identify respondents with endline measures of numeracy ;
merge 1:1 id using "./data/financial-knowledge-short-term", keepusing(id) ;
tab _merge ;
keep if _merge == 3 ;
drop _merge ;

* drop households with missing discount rate (control variable) ;
drop if disc_rate == . ;

* ***************************************************************************** ;
* create financial knowledge scores
* ***************************************************************************** ;

egen end_numeracy = rowmean(numer_fin_return-numer_int_rate) ;
egen end_awareness = rowmean(aware_budget-aware_unprod) ;
egen end_attitudes = rowmean(attit_suggest_ins-attit_budget) ;

* ***************************************************************************** ;
* create treatment dummies
* ***************************************************************************** ;

gen flcounsgoal = (fin_ed_treatment == 1 & couns_treatment == 1 & goal_treatment == 1)  ;
gen flcouns = (fin_ed_treatment == 1 & couns_treatment == 1 & goal_treatment == 0) ;
gen flgoal = (fin_ed_treatment == 1 & couns_treatment == 0 & goal_treatment == 1) ;
gen fl = (fin_ed_treatment == 1 & couns_treatment == 0 & goal_treatment == 0) ;
gen control = (fin_ed_treatment == 0 & couns_treatment == 0 & goal_treatment == 0) ;

* ***************************************************************************** ;
* pre-treatment control variables
* ***************************************************************************** ;

local cvars has_hard_time_saving interested_in_financial inconsistent disc_rate riskaverse  ;
local rcvars rhas_hard_time_saving rinterested_in_financial rinconsistent rdisc_rate rriskaverse  ;

* ***************************************************************************** ;
* make variables to keep the estimates 
* ***************************************************************************** ;

gen y = "" ;
gen t = "" ;
gen m = "" ;
gen estimator = "" ; 
gen pval = . ;

order y t m estimator pval ;

local row = 1 ; 

* ***************************************************************************** ;
* get ATE estimates and their p-values
* ***************************************************************************** ;

* loop over all treatments and outcomes { ;
foreach t in fl flgoal flcouns flcounsgoal { ;
	foreach y of varlist budget_tried savings_bank loan_purpose_bus_educ ins_life { ;
	
	areg `y' `t' `cvars' if `t'|control, a(strata) clu(wave_class) ; 
	replace y = "`y'" in `row' ;
	replace t = "`t'" in `row' ;
	replace m = "." in `row' ;
	replace estimator = "ATE" in `row' ;
	replace pval = 2*ttail(e(df_r),abs(_b[`t']/_se[`t'])) in `row' ; 

	local ++row ;
} ;
} ;

* ***************************************************************************** ;
* rename variable names to make them shorter ;
* ***************************************************************************** ;

ren budget_helpful bdg_help ;
ren budget_tried bdg_tried ;
ren budget_regular bdg_reg ;
ren savings_informal sav_inf ;
ren savings_bank sav_bnk ;
ren loan_purpose_bus_educ loan_bus ; 
ren ins_life ins_life ;

* ***************************************************************************** ;
* APPENDIX TABLE A9, PANEL A: ACME, FL only 
* ***************************************************************************** ;

est drop _all ;

local t fl ;
local outcomes bdg_tried ; 

* remove strata FEs ;
foreach var of varlist `t' `outcomes' `cvars' end_numeracy end_awareness end_attitudes { ;
	reg `var' i.strata if `t' | control ;
	predict r`var', resid ;
	label var r`var' `var' ;
} ;

* loop over all outcomes and mediators ;
foreach y of local outcomes { ;
	foreach m of varlist end_numeracy end_awareness end_attitudes { ;
	
		di "*********** treatment: `t', outcome: `y', mediator: `m' ******************* " ;
		medeff_FC	(regress r`m' r`t' `rcvars') 
					(regress r`y' r`t' r`m' `rcvars')
					if `t' | control,
					treat(r`t') mediate(r`m') seed(24533) vce(cluster wave_class) sims(1000) ;
		
		* ACME p-value ;
		replace y = "`y'" in `row' ; 
		replace t = "`t'" in `row' ;
		replace m = "`m'" in `row' ;
		replace estimator = "ACME" in `row' ; 
		replace pval = 2*(1-normal(abs(r(acme)/r(acme_sd)))) in `row' ;
		local ++row ; 

		* ADE p-value ;
		replace y = "`y'" in `row' ; 
		replace t = "`t'" in `row' ;
		replace m = "`m'" in `row' ;
		replace estimator = "ADE" in `row' ; 
		replace pval = 2*ttail(e(df_r),abs(_b[r`t']/_se[r`t'])) in `row' ;
		local ++row ;		
				
	} ;
} ;

* ***************************************************************************** ;
* APPENDIX TABLE A9, PANEL B: ACME, FL + goal
* ***************************************************************************** ;

est drop _all ;
drop rbdg_tried-rend_attitudes ;

local t flgoal ;
local outcomes bdg_tried sav_bnk ; 

* remove strata FEs ;
foreach var of varlist `t' `outcomes' `cvars' end_numeracy end_awareness end_attitudes { ;
	reg `var' i.strata if `t' | control ;
	predict r`var', resid ;
	label var r`var' `var' ;
} ;

* loop over all outcomes and mediators ;
foreach y of local outcomes { ;
	foreach m of varlist end_numeracy end_awareness end_attitudes { ;
		di "*********** treatment: `t', outcome: `y', mediator: `m' ******************* " ;
		medeff_FC	(regress r`m' r`t' `rcvars') 
					(regress r`y' r`t' r`m' `rcvars')
					if `t' | control,
					treat(r`t') mediate(r`m') seed(24533) vce(cluster wave_class) sims(1000) ;
					
		* ACME p-value ;
		replace y = "`y'" in `row' ; 
		replace t = "`t'" in `row' ;
		replace m = "`m'" in `row' ;
		replace estimator = "ACME" in `row' ; 
		replace pval = 2*(1-normal(abs(r(acme)/r(acme_sd)))) in `row' ;
		local ++row ; 

		* ADE p-value ;
		replace y = "`y'" in `row' ; 
		replace t = "`t'" in `row' ;
		replace m = "`m'" in `row' ;
		replace estimator = "ADE" in `row' ; 
		replace pval = 2*ttail(e(df_r),abs(_b[r`t']/_se[r`t'])) in `row' ;
		local ++row ;	
		
	} ;
} ;

* ***************************************************************************** ;
* APPENDIX TABLE A9, PANEL C: ACME, FL + couns
* ***************************************************************************** ;

est drop _all ;
drop rbdg_tried-rend_attitudes ;

local t flcouns ;
local outcomes bdg_tried sav_bnk loan_bus ins_life ; 

* remove strata FEs ;
foreach var of varlist `t' `outcomes' `cvars' end_numeracy end_awareness end_attitudes { ;
	reg `var' i.strata if `t' | control ;
	predict r`var', resid ;
	label var r`var' `var' ;
} ;

* loop over all outcomes and mediators ;
foreach y of local outcomes { ;
	foreach m of varlist end_numeracy end_awareness end_attitudes { ;
		di "*********** treatment: `t', outcome: `y', mediator: `m' ******************* " ;
		medeff_FC	(regress r`m' r`t' `rcvars') 
					(regress r`y' r`t' r`m' `rcvars')
					if `t' | control,
					treat(r`t') mediate(r`m') seed(24533) vce(cluster wave_class) sims(1000) ;
					
		* ACME p-value ;
		replace y = "`y'" in `row' ; 
		replace t = "`t'" in `row' ;
		replace m = "`m'" in `row' ;
		replace estimator = "ACME" in `row' ; 
		replace pval = 2*(1-normal(abs(r(acme)/r(acme_sd)))) in `row' ;
		local ++row ; 

		* ADE p-value ;
		replace y = "`y'" in `row' ; 
		replace t = "`t'" in `row' ;
		replace m = "`m'" in `row' ;
		replace estimator = "ADE" in `row' ; 
		replace pval = 2*ttail(e(df_r),abs(_b[r`t']/_se[r`t'])) in `row' ;
		local ++row ;	
		
	} ;
} ;


* ***************************************************************************** ;
* APPENDIX TABLE A9, PANEL D: FL + couns + goal
* ***************************************************************************** ;

est drop _all ;
drop rbdg_tried-rend_attitudes ;

local t flcounsgoal ;
local outcomes bdg_tried sav_bnk ins_life ; 

* remove strata FEs ;
foreach var of varlist `t' `outcomes' `cvars' end_numeracy end_awareness end_attitudes { ;
	reg `var' i.strata if `t' | control ;
	predict r`var', resid ;
	label var r`var' `var' ;
} ;

* loop over all outcomes and mediators ;
foreach y of local outcomes { ;
	foreach m of varlist end_numeracy end_awareness end_attitudes { ;
		di "*********** treatment: `t', outcome: `y', mediator: `m' ******************* " ;
		medeff_FC	(regress r`m' r`t' `rcvars') 
					(regress r`y' r`t' r`m' `rcvars')
					if `t' | control,
					treat(r`t') mediate(r`m') seed(24533) vce(cluster wave_class) sims(1000) ;
					
		* ACME p-value ;
		replace y = "`y'" in `row' ; 
		replace t = "`t'" in `row' ;
		replace m = "`m'" in `row' ;
		replace estimator = "ACME" in `row' ; 
		replace pval = 2*(1-normal(abs(r(acme)/r(acme_sd)))) in `row' ;
		local ++row ; 

		* ADE p-value ;
		replace y = "`y'" in `row' ; 
		replace t = "`t'" in `row' ;
		replace m = "`m'" in `row' ;
		replace estimator = "ADE" in `row' ; 
		replace pval = 2*ttail(e(df_r),abs(_b[r`t']/_se[r`t'])) in `row' ;
		local ++row ;	 
		
	} ;
} ;

* ***************************************************************************** ;
* FDR adjustment of p-values
* ***************************************************************************** ;

* keep only variables I need ; 
keep y t m pval estimator ;
drop if pval == . ;

* The following code is from Anderson (JASA, 2008) ; 

* Collect the total number of p-values tested ;
quietly sum pval ;
local totalpvals = r(N) ;

* Sort the p-values in ascending order and generate a variable that codes each p-value's rank ;
quietly gen int original_sorting_order = _n ;
quietly sort pval ;
quietly gen int rank = _n if pval~=. ;

* Set the initial counter to 1 ;
local qval = 1 ;

* Generate the variable that will contain the BH (1995) q-values ;
gen bh95_qval = 1 if pval~=. ;

* Set up a loop that begins by checking which hypotheses are rejected at q = 1.000, 
*then checks which hypotheses are rejected at q = 0.999, then checks which hypotheses 
* are rejected at q = 0.998, etc. The loop ends by checking which hypotheses
* are rejected at q = 0.001. ;

while `qval' > 0 { ;

	* Generate value qr/M ;
	quietly gen fdr_temp = `qval'*rank/`totalpvals' ;
	
	* Generate binary variable checking condition p(r) <= qr/M ;
	quietly gen reject_temp = (fdr_temp>=pval) if fdr_temp~=. ;
	
	* Generate variable containing p-value ranks for all p-values that meet above condition ;
	quietly gen reject_rank = reject_temp*rank ;
	
	* Record the rank of the largest p-value that meets above condition ;
	quietly egen total_rejected = max(reject_rank) ;
	
	* A p-value has been rejected at level q if its rank is less than or equal to the rank of the max p-value that meets the above condition ;
	replace bh95_qval = `qval' if rank <= total_rejected & rank~=. ;
	
	* Reduce q by 0.001 and repeat loop ;
	quietly drop fdr_temp reject_temp reject_rank total_rejected ;
	local qval = `qval' - .001 ;
	
} ;
	
quietly sort original_sorting_order ;

* output to csv ; 
replace pval = round(pval, 0.001) ;
outsheet y t m estimator pval bh95_qval using "./output/appendix-table-A09.csv", comma names replace ;

exit ;
